Vignette for ArchR: https://www.archrproject.com/bookdown/getting-started-with-archr.html
PBMC_Multiome_10xTn5_CellRangerARC_fragments.tsv.gz PBMC_Multiome_Tn5hc_50perc_CellRangerARC_fragments.tsv.gz PBMC_Multiome_Tn5hc_CellRangerARC_fragments.tsv.gz 20230912_PBMC_Multiome_scATAC_UMAP.ipynb
### Libraries
library(magrittr)
library(ArchR)
library(RWireX)
library(ggpubr)
library(purrr)
library(Seurat)
library(parallel)
library(scDblFinder)
library(qs)
source("plotCellQuality.R")
source("plotUmapQC.R")
source("plotUmapClusters.R")
source("plotUmaps.R")
### Set ArchR parameters.
addArchRThreads(threads = 30) # number of parallel threads
addArchRGenome("hg38") # genome and gene annotation ("hg19", "hg38", "mm9", "mm10")
### Set HDF5 environment variables to prevent HDF5 error later
Sys.setenv(HDF5_USE_FILE_LOCKING=FALSE)
Sys.setenv(RHDF5_USE_FILE_LOCKING=FALSE)
### Samples
samples <- c("PBMC_scATAC", "PBMC_scTurboATAC_16", "PBMC_scTurboATAC_13")
### Sample palette
sample_palette <- c("grey", "blue", "darkblue")
names(sample_palette) <- samples
### Data
inputFiles <- c("PBMC_Multiome_10xTn5_CellRangerARC_fragments.tsv.gz",
"PBMC_Multiome_Tn5hc_50perc_CellRangerARC_fragments.tsv.gz",
"PBMC_Multiome_Tn5hc_CellRangerARC_fragments.tsv.gz")
names(inputFiles) <- samples
ArrowFiles <- createArrowFiles(
inputFiles = inputFiles,
sampleNames = names(inputFiles),
minTSS = 0, #Dont set this too high because you can always increase later
minFrags = 100,
maxFrags = 5000000,
addTileMat = TRUE,
addGeneScoreMat = FALSE,
verbose = FALSE,
subThreading = FALSE
)
names(ArrowFiles) <- gsub("\\.arrow", "", ArrowFiles)
proj_list <- list()
for (sample in samples){
proj_list[[sample]] <- ArchRProject(
ArrowFiles = ArrowFiles[[sample]],
outputDirectory = paste0("ArchRProj_", sample),
copyArrows = TRUE #This is recommened so that if you modify the Arrow files you have an original copy for later usage.
)
}
proj <- ArchRProject(
ArrowFiles = ArrowFiles,
outputDirectory = "ArchRProj_all",
copyArrows = TRUE #This is recommened so that if you modify the Arrow files you have an original copy for later usage.
)
### Fragment Size distributions
fragsize_samples <- plotFragmentSizes(ArchRProj = proj, pal = sample_palette) + theme(plot.title = element_text(size=30), axis.title = element_text(size=28), axis.text = element_text(size=28), axis.text.x=element_text(angle=0), legend.title = element_text(size=0), legend.text = element_text(size=16)) +
ggtitle("Fragment size distribution")
### TSS enrichment profiles
tssprofiles_samples <- plotTSSEnrichment(ArchRProj = proj, pal = sample_palette) + theme(plot.title = element_text(size=30), axis.title = element_text(size=28), axis.text = element_text(size=28), axis.text.x=element_text(angle=0), legend.title = element_text(size=0), legend.text = element_text(size=16)) +
ggtitle("TSS enrichment profile")
options(repr.plot.width=32, repr.plot.height=10)
ggAlignPlots(fragsize_samples, tssprofiles_samples, type = "h")
log10frags_samples <- ggplot(lapply(proj_list, function(x){data.frame(AnalysesGroups = factor(x$Sample, levels = samples), log10nFrags = log10(x$nFrags))}) %>% do.call(rbind, .), aes(x = AnalysesGroups, y = log10nFrags)) + geom_violin(aes(fill = AnalysesGroups), alpha = 0.8) + geom_boxplot(alpha = 0.001) +
theme_minimal() + theme(legend.position = "none", plot.title = element_text(size=30), axis.title = element_text(size=28), axis.text = element_text(size=28), axis.text.x=element_text(angle=90, vjust=1, hjust=1),
legend.title = element_text(size=28), legend.text = element_text(size=28)) +
scale_fill_manual(values = sample_palette) + ggtitle("Number of fragments") + xlab("") + ylab("log10(nFrags)")
tss_samples <- ggplot(lapply(proj_list, function(x){data.frame(AnalysesGroups = factor(x$Sample, levels = samples), tss = x$TSSEnrichment)}) %>% do.call(rbind, .), aes(x = AnalysesGroups, y = tss)) + geom_violin(aes(fill = AnalysesGroups), alpha = 0.8) + geom_boxplot(alpha = 0.001) +
theme_minimal() + theme(legend.position = "none", plot.title = element_text(size=30), axis.title = element_text(size=28), axis.text = element_text(size=28), axis.text.x=element_text(angle=90, vjust=1, hjust=1),
legend.title = element_text(size=28), legend.text = element_text(size=28)) +
scale_fill_manual(values = sample_palette) + ggtitle("TSS enrichment scores") + xlab("") + ylab("TSS enrichment score")
options(repr.plot.width=20, repr.plot.height=12)
ggarrange(log10frags_samples, tss_samples, ncol = 2)
minTSS <- c(6, 6, 6)
names(minTSS) <- samples
minFrag <- c(4, 4, 4)
names(minFrag) <- samples
p_list <- lapply(samples, function(sample){plotCellQuality(proj_list[[sample]], sample, set_xlim = c(2, 6.5), set_ylim = c(0, 45),
cutoff_feature1 = minFrag[sample], cutoff_feature2 = minTSS[sample])})
names(p_list) <- samples
options(repr.plot.width=30, repr.plot.height=13)
ggarrange(plotlist = p_list, ncol = 3, nrow = 1)
p_list <- lapply(samples, function(sample){plotCellQuality(proj_list[[sample]], sample, feature2 = "log10(ReadsInTSS)",
cutoff_feature1 = 3.8, cutoff_feature2 = 3,
set_xlim = c(2, 6.5), set_ylim = c(0, 6))})
names(p_list) <- samples
options(repr.plot.width=30, repr.plot.height=13)
ggarrange(plotlist = p_list, ncol = 3, nrow = 1)
Lower cutoff for PBMC_scATAC to maintain even cell numbers.
minFrag <- c(3.8, 3.8, 3.8)
names(minFrag) <- samples
proj_list <- lapply(names(proj_list), function(x){proj_list[[x]][proj_list[[x]]$cellNames[which(getCellColData(proj_list[[x]], select = "log10(nFrags)")[,1] >= minFrag[x])], ]})
names(proj_list) <- samples
minTSS <- c(1000, 1000, 1000)
names(minTSS) <- samples
proj_list <- lapply(names(proj_list), function(x){proj_list[[x]][proj_list[[x]]$cellNames[which(getCellColData(proj_list[[x]], select = "ReadsInTSS")[,1] > minTSS[x])], ]})
names(proj_list) <- samples
for (sample in samples){
print(sample)
nFiltered <- getCellColData(proj_list[[sample]]) %>% nrow(.)
print(paste0("Filtered cells: ", nFiltered))
}
log10frags_samples <- ggplot(lapply(proj_list, function(x){data.frame(AnalysesGroups = factor(x$Sample, levels = samples), log10nFrags = log10(x$nFrags))}) %>% do.call(rbind, .), aes(x = AnalysesGroups, y = log10nFrags)) + geom_violin(aes(fill = AnalysesGroups), alpha = 0.8) + geom_boxplot(alpha = 0.001) +
theme_minimal() + theme(legend.position = "none", plot.title = element_text(size=30), axis.title = element_text(size=28), axis.text = element_text(size=28), axis.text.x=element_text(angle=90, vjust=1, hjust=1),
legend.title = element_text(size=28), legend.text = element_text(size=28)) +
scale_fill_manual(values = sample_palette) + ggtitle("Number of fragments") + xlab("") + ylab("log10(nFrags)")
tss_samples <- ggplot(lapply(proj_list, function(x){data.frame(AnalysesGroups = factor(x$Sample, levels = samples), tss = x$TSSEnrichment)}) %>% do.call(rbind, .), aes(x = AnalysesGroups, y = tss)) + geom_violin(aes(fill = AnalysesGroups), alpha = 0.8) + geom_boxplot(alpha = 0.001) +
theme_minimal() + theme(legend.position = "none", plot.title = element_text(size=30), axis.title = element_text(size=28), axis.text = element_text(size=28), axis.text.x=element_text(angle=90, vjust=1, hjust=1),
legend.title = element_text(size=28), legend.text = element_text(size=28)) +
scale_fill_manual(values = sample_palette) + ggtitle("TSS enrichment scores") + xlab("") + ylab("TSS enrichment score")
options(repr.plot.width=32, repr.plot.height=20)
ggarrange(log10frags_samples, tss_samples, ncol = 2)
stats <- data.frame(c("Sample", "mean(log10(nFrags))", "median(log10(nFrags))",
"mean(log10(ReadsInTSS))", "median(log10(ReadsInTSS))",
"mean(TSSEnrichment)", "median(TSSEnrichment)"))
for (sample in samples){
stats <- cbind(stats, c(sample,
getCellColData(proj_list[[sample]], select = "log10(nFrags)")[,1] %>% mean(.) %>% round(., 2),
getCellColData(proj_list[[sample]], select = "log10(nFrags)")[,1] %>% median(.) %>% round(., 2),
getCellColData(proj_list[[sample]], select = "log10(ReadsInTSS)")[,1] %>% mean(.) %>% round(., 2),
getCellColData(proj_list[[sample]], select = "log10(ReadsInTSS)")[,1] %>% median(.) %>% round(., 2),
getCellColData(proj_list[[sample]], select = "TSSEnrichment")[,1] %>% mean(.) %>% round(., 2),
getCellColData(proj_list[[sample]], select = "TSSEnrichment")[,1] %>% median(.) %>% round(., 2)))
}
colnames(stats) <- NULL
stats
p_list <- lapply(samples, function(sample){plotCellQuality(proj_list[[sample]], sample, set_xlim = c(2, 6.5), set_ylim = c(0, 45),
cutoff_feature1 = minFrag[sample], cutoff_feature2 = 4)})
names(p_list) <- samples
options(repr.plot.width=30, repr.plot.height=13)
ggarrange(plotlist = p_list, ncol = 3, nrow = 1)
p_list <- lapply(samples, function(sample){plotCellQuality(proj_list[[sample]], sample, feature2 = "log10(ReadsInTSS)",
cutoff_feature1 = 3.8, cutoff_feature2 = 3,
set_xlim = c(2, 6.5), set_ylim = c(0, 6))})
names(p_list) <- samples
options(repr.plot.width=30, repr.plot.height=13)
ggarrange(plotlist = p_list, ncol = 3, nrow = 1)
proj_list <- lapply(proj_list, function(x){addIterativeLSI(
ArchRProj = x,
useMatrix = "TileMatrix",
name = "IterativeLSI",
iterations = 2,
clusterParams = list( #See Seurat::FindClusters
resolution = c(0.2),
sampleCells = 10000,
n.start = 10
),
varFeatures = 25000,
dimsToUse = 1:30,
force = TRUE
)})
IterativeLSI_list <- lapply(proj_list, function(x){getReducedDims(x, "IterativeLSI", returnMatrix = FALSE)})
options(repr.plot.width=21, repr.plot.height=5)
par(mfrow=c(1, 3))
for (x in samples){
plot(IterativeLSI_list[[x]]$svd$d, bty='l', xlab='LSI components', main=x, ylab='SVD deviation', pch=16, cex.lab=1.2, cex.axis=1.1)
plot(IterativeLSI_list[[x]]$svd$d, bty='l', xlab='LSI components', ylab='SVD deviation', pch=16, cex.lab=1.2, cex.axis=1.1, ylim=c(0,1500))
plot(IterativeLSI_list[[x]]$corToDepth$none, bty='l', xlab='LSI components', ylab='Correlation to sequencing depth', pch=16, cex.lab=1.2, cex.axis=1.1)
}
LSIcomponents <- list(2:17, 2:17, 2:17)
names(LSIcomponents) <- samples
proj_list <- lapply(names(proj_list), function(x){addIterativeLSI(
ArchRProj = proj_list[[x]],
useMatrix = "TileMatrix",
name = "IterativeLSI",
iterations = 2,
clusterParams = list( #See Seurat::FindClusters
resolution = c(0.2),
sampleCells = 10000,
n.start = 10
),
varFeatures = 25000,
dimsToUse = LSIcomponents[[x]],
force = TRUE
)})
names(proj_list) <- samples
proj_list <- lapply(proj_list, function(x){addUMAP(
ArchRProj = x,
reducedDims = "IterativeLSI",
name = "UMAP",
nNeighbors = 30,
minDist = 0.5,
metric = "cosine",
force = TRUE
)})
amulet_res <- lapply(samples, function(sample){res <- amulet(inputFiles[sample],
barcodes = rownames(proj_list[[sample]]@cellColData) %>% gsub(paste0(sample, "#"), "", .),
regionsToExclude = GRanges(c("M","chrM","MT","X","Y","chrX","chrY"), IRanges(1L, width=10^8)), # excluding repeats, as well as sex and mitochondrial chromosomes
uniqueFrags = TRUE, # only use unique fragments
maxFragSize = 1000L, # maximum fragment size to consider
removeHighOverlapSites = TRUE); # remove sites that have more than two reads in unexpectedly many cells
rownames(res) <- paste0(sample, "#", rownames(res));
colnames(res) <- paste0("Amulet_", colnames(res));
qsave(res, paste0("Amulet_DoubletDetection_", sample, ".qs"));
return(res)})
for (sample in samples){
proj_list[[sample]]@cellColData <- cbind(proj_list[[sample]]@cellColData, amulet_res[[sample]][match(rownames(proj_list[[sample]]@cellColData), rownames(amulet_res[[sample]])),])
proj_list[[sample]]@cellColData$Amulet_doublet <- proj_list[[sample]]@cellColData$Amulet_q.value <= 1e-5
}
plot_list <- list()
for (sample in samples){
plot_list[["1"]][[sample]] <- ggplot(as.data.frame(proj_list[[sample]]@cellColData), aes(x = log10(Amulet_nFrags), y = log10(Amulet_q.value))) + ggpointdensity::geom_pointdensity() +
ggtitle(sample) +
ggpubr::stat_cor(p.accuracy = 0.001, r.accuracy = 0.01, method = "spearman", size = 5) +
theme_minimal() + theme(text = element_text(size = 15))
plot_list[["2"]][[sample]] <- ggplot(as.data.frame(proj_list[[sample]]@cellColData), aes(x = log10(Amulet_nFrags), y = log10(Amulet_p.value))) + ggpointdensity::geom_pointdensity() +
ggtitle(sample) +
ggpubr::stat_cor(p.accuracy = 0.001, r.accuracy = 0.01, method = "spearman", size = 5) +
theme_minimal() + theme(text = element_text(size = 15))
}
options(repr.plot.width=6*length(samples), repr.plot.height=5)
ggarrange(plotlist = plot_list[["1"]], ncol = length(samples))
ggarrange(plotlist = plot_list[["2"]], ncol = length(samples))
umap_list <- list()
for (sample in samples){
umap_list[[sample]] <- plotEmbedding(proj_list[[sample]], embedding = "UMAP",
colorBy = "cellColData", name = "Amulet_doublet",
pal = c("FALSE" = "grey", "TRUE" = "darkred"), size = 0.5)
}
options(repr.plot.width=25, repr.plot.height=10)
ggarrange(plotlist = umap_list, ncol=length(samples), nrow=1)
for (sample in samples){
print(sample)
print(table(proj_list[[sample]]@cellColData$Amulet_doublet))
print(round(sum(proj_list[[sample]]@cellColData$Amulet_doublet) / nrow(proj_list[[sample]]@cellColData), 2))
}
proj_list <- lapply(names(proj_list), function(x){proj_list[[x]][proj_list[[x]]$cellNames[!proj_list[[x]]$Amulet_doublet]]})
names(proj_list) <- samples
log10frags_samples <- ggplot(lapply(proj_list, function(x){data.frame(AnalysesGroups = factor(x$Sample, levels = samples), log10nFrags = log10(x$nFrags))}) %>% do.call(rbind, .), aes(x = AnalysesGroups, y = log10nFrags)) + geom_violin(aes(fill = AnalysesGroups), alpha = 0.8) + geom_boxplot(alpha = 0.001) +
theme_minimal() + theme(legend.position = "none", plot.title = element_text(size=14), axis.title = element_text(size=12), axis.text = element_text(size=12), axis.text.x=element_text(angle=90, vjust=1, hjust=1),
legend.title = element_text(size=12), legend.text = element_text(size=12)) +
scale_fill_manual(values = sample_palette) + ggtitle("Number of fragments") + xlab("") + ylab("log10(nFrags)")
tss_samples <- ggplot(lapply(proj_list, function(x){data.frame(AnalysesGroups = factor(x$Sample, levels = samples), tss = x$TSSEnrichment)}) %>% do.call(rbind, .), aes(x = AnalysesGroups, y = tss)) + geom_violin(aes(fill = AnalysesGroups), alpha = 0.8) + geom_boxplot(alpha = 0.001) +
theme_minimal() + theme(legend.position = "none", plot.title = element_text(size=14), axis.title = element_text(size=12), axis.text = element_text(size=12), axis.text.x=element_text(angle=90, vjust=1, hjust=1),
legend.title = element_text(size=12), legend.text = element_text(size=12)) +
scale_fill_manual(values = sample_palette) + ggtitle("TSS enrichment scores") + xlab("") + ylab("TSS enrichment score")
options(repr.plot.width=10, repr.plot.height=8)
ggarrange(log10frags_samples, tss_samples, ncol = 2)
p_list <- lapply(samples, function(sample){plotCellQuality(proj_list[[sample]], sample, set_xlim = c(2, 6.5), set_ylim = c(0, 45),
cutoff_feature1 = minFrag[sample], cutoff_feature2 = 4)})
names(p_list) <- samples
options(repr.plot.width=30, repr.plot.height=13)
ggarrange(plotlist = p_list, ncol = 3, nrow = 1)
proj_list <- lapply(proj_list, function(x){addIterativeLSI(
ArchRProj = x,
useMatrix = "TileMatrix",
name = "IterativeLSI",
iterations = 2,
clusterParams = list( #See Seurat::FindClusters
resolution = c(0.2),
sampleCells = 10000,
n.start = 10
),
varFeatures = 25000,
dimsToUse = 1:30,
force = TRUE
)})
IterativeLSI_list <- lapply(proj_list, function(x){getReducedDims(x, "IterativeLSI", returnMatrix = FALSE)})
options(repr.plot.width=21, repr.plot.height=5)
par(mfrow=c(1, length(samples)))
for (x in samples){
plot(IterativeLSI_list[[x]]$svd$d, bty='l', xlab='LSI components', main=x, ylab='SVD deviation', pch=16, cex.lab=1.2, cex.axis=1.1)
plot(IterativeLSI_list[[x]]$svd$d, bty='l', xlab='LSI components', ylab='SVD deviation', pch=16, cex.lab=1.2, cex.axis=1.1, ylim=c(0,1500))
plot(IterativeLSI_list[[x]]$corToDepth$none, bty='l', xlab='LSI components', ylab='Correlation to sequencing depth', pch=16, cex.lab=1.2, cex.axis=1.1)
}
LSIcomponents <- list(2:20, 2:20, 2:20)
names(LSIcomponents) <- samples
proj_list <- lapply(names(proj_list), function(x){addIterativeLSI(
ArchRProj = proj_list[[x]],
useMatrix = "TileMatrix",
name = "IterativeLSI",
iterations = 2,
clusterParams = list( #See Seurat::FindClusters
resolution = c(0.2),
sampleCells = 10000,
n.start = 10
),
varFeatures = 25000,
dimsToUse = LSIcomponents[[x]],
force = TRUE
)})
names(proj_list) <- samples
proj_list <- lapply(proj_list, function(x){addUMAP(
ArchRProj = x,
reducedDims = "IterativeLSI",
name = "UMAP",
nNeighbors = 30,
minDist = 0.5,
metric = "cosine",
force = TRUE
)})
umap_list <- lapply(names(proj_list), function(x){plotUmapQC(proj_list[[x]], "UMAP", "Sample", x, doubletScore=FALSE, coloringPalette=sample_palette)})
names(umap_list) <- names(proj_list)
options(repr.plot.width=25, repr.plot.height=20*length(samples))
lapply(umap_list, function(x){ggpubr::ggarrange(plotlist=x, ncol=3, nrow=2)}) %>% ggpubr::ggarrange(plotlist=., ncol=1, nrow=length(samples))
for (sample in samples){
print(sample)
nFiltered <- getCellColData(proj_list[[sample]]) %>% nrow(.)
print(paste0("Filtered cells: ", nFiltered))
}
proj_list <- lapply(names(proj_list), function(x){proj_list[[x]][proj_list[[x]]$cellNames[which(getCellColData(proj_list[[x]], select = "BlacklistRatio")[,1] < 0.01)], ]})
names(proj_list) <- samples
for (sample in samples){
print(sample)
nFiltered <- getCellColData(proj_list[[sample]]) %>% nrow(.)
print(paste0("Filtered cells: ", nFiltered))
}
proj_list <- lapply(names(proj_list), function(x){proj_list[[x]][proj_list[[x]]$cellNames[which(getCellColData(proj_list[[x]], select = "NucleosomeRatio")[,1] < 3)], ]})
names(proj_list) <- samples
for (sample in samples){
print(sample)
nFiltered <- getCellColData(proj_list[[sample]]) %>% nrow(.)
print(paste0("Filtered cells: ", nFiltered))
}
log10frags_samples <- ggplot(lapply(proj_list, function(x){data.frame(AnalysesGroups = factor(x$Sample, levels = samples), log10nFrags = log10(x$nFrags))}) %>% do.call(rbind, .), aes(x = AnalysesGroups, y = log10nFrags)) + geom_violin(aes(fill = AnalysesGroups), alpha = 0.8) + geom_boxplot(alpha = 0.001) +
theme_minimal() + theme(legend.position = "none", plot.title = element_text(size=14), axis.title = element_text(size=12), axis.text = element_text(size=12), axis.text.x=element_text(angle=90, vjust=1, hjust=1),
legend.title = element_text(size=12), legend.text = element_text(size=12)) +
scale_fill_manual(values = sample_palette) + ggtitle("Number of fragments") + xlab("") + ylab("log10(nFrags)")
tss_samples <- ggplot(lapply(proj_list, function(x){data.frame(AnalysesGroups = factor(x$Sample, levels = samples), tss = x$TSSEnrichment)}) %>% do.call(rbind, .), aes(x = AnalysesGroups, y = tss)) + geom_violin(aes(fill = AnalysesGroups), alpha = 0.8) + geom_boxplot(alpha = 0.001) +
theme_minimal() + theme(legend.position = "none", plot.title = element_text(size=14), axis.title = element_text(size=12), axis.text = element_text(size=12), axis.text.x=element_text(angle=90, vjust=1, hjust=1),
legend.title = element_text(size=12), legend.text = element_text(size=12)) +
scale_fill_manual(values = sample_palette) + ggtitle("TSS enrichment scores") + xlab("") + ylab("TSS enrichment score")
options(repr.plot.width=10, repr.plot.height=8)
ggarrange(log10frags_samples, tss_samples, ncol = 2)
p_list <- lapply(samples, function(sample){plotCellQuality(proj_list[[sample]], sample, set_xlim = c(2, 6.5), set_ylim = c(0, 45),
cutoff_feature1 = minFrag[sample], cutoff_feature2 = 4)})
names(p_list) <- samples
options(repr.plot.width=30, repr.plot.height=13)
ggarrange(plotlist = p_list, ncol = 3, nrow = 1)
proj_list <- lapply(proj_list, function(x){addIterativeLSI(
ArchRProj = x,
useMatrix = "TileMatrix",
name = "IterativeLSI",
iterations = 2,
clusterParams = list( #See Seurat::FindClusters
resolution = c(0.2),
sampleCells = 10000,
n.start = 10
),
varFeatures = 25000,
dimsToUse = 1:30,
force = TRUE
)})
IterativeLSI_list <- lapply(proj_list, function(x){getReducedDims(x, "IterativeLSI", returnMatrix = FALSE)})
options(repr.plot.width=21, repr.plot.height=5)
par(mfrow=c(1, length(samples)))
for (x in samples){
plot(IterativeLSI_list[[x]]$svd$d, bty='l', xlab='LSI components', main=x, ylab='SVD deviation', pch=16, cex.lab=1.2, cex.axis=1.1)
plot(IterativeLSI_list[[x]]$svd$d, bty='l', xlab='LSI components', ylab='SVD deviation', pch=16, cex.lab=1.2, cex.axis=1.1, ylim=c(0,1500))
plot(IterativeLSI_list[[x]]$corToDepth$none, bty='l', xlab='LSI components', ylab='Correlation to sequencing depth', pch=16, cex.lab=1.2, cex.axis=1.1)
}
LSIcomponents <- list(2:20, 2:20, 2:20)
names(LSIcomponents) <- samples
proj_list <- lapply(names(proj_list), function(x){addIterativeLSI(
ArchRProj = proj_list[[x]],
useMatrix = "TileMatrix",
name = "IterativeLSI",
iterations = 2,
clusterParams = list( #See Seurat::FindClusters
resolution = c(0.2),
sampleCells = 10000,
n.start = 10
),
varFeatures = 25000,
dimsToUse = LSIcomponents[[x]],
force = TRUE
)})
names(proj_list) <- samples
proj_list <- lapply(proj_list, function(x){addUMAP(
ArchRProj = x,
reducedDims = "IterativeLSI",
name = "UMAP",
nNeighbors = 30,
minDist = 0.5,
metric = "cosine",
force = TRUE
)})
umap_list <- lapply(names(proj_list), function(x){plotUmapQC(proj_list[[x]], "UMAP", "Sample", x, doubletScore=FALSE, coloringPalette=sample_palette)})
names(umap_list) <- names(proj_list)
options(repr.plot.width=25, repr.plot.height=20*length(samples))
lapply(umap_list, function(x){ggpubr::ggarrange(plotlist=x, ncol=3, nrow=2)}) %>% ggpubr::ggarrange(plotlist=., ncol=1, nrow=length(samples))
umap_clusters_filtered <- lapply(proj_list, function(x){plotUmapClusters(x, reducedDims="IterativeLSI",
embedding="UMAP", resolutions = c(0.1, 0.2, 0.3))})
options(repr.plot.width=28, repr.plot.height=10*length(samples))
lapply(umap_clusters_filtered, function(x){ggpubr::ggarrange(plotlist=x, ncol=3, nrow=1)}) %>% ggpubr::ggarrange(plotlist=., ncol=1, nrow=length(samples))
proj_list <- lapply(names(proj_list), function(x){addClusters(proj_list[[x]],
reducedDims = "IterativeLSI", dimsToUse = LSIcomponents[[x]],
method = "Seurat", name = "Clusters", resolution = 0.3)})
names(proj_list) <- samples
lapply(proj_list, function(x){table(x$Clusters)})
umap_list <- lapply(proj_list, function(x){plotUmaps(x, embeddings = c("UMAP"), coloringLayer = "Clusters")[[1]]})
names(umap_list) <- samples
options(repr.plot.width=8*length(samples), repr.plot.height=10)
ggarrange(plotlist = umap_list, ncol=length(samples), nrow=1)
log10frags_samples <- ggplot(lapply(proj_list, function(x){data.frame(AnalysesGroups = x$Clusters, log10nFrags = log10(x$nFrags), Sample = x$Sample)}) %>% do.call(rbind, .), aes(x = AnalysesGroups, y = log10nFrags)) + geom_violin(aes(fill = AnalysesGroups), alpha = 0.8) + geom_boxplot(alpha = 0.001) +
theme_minimal() + facet_wrap(~Sample) + theme(legend.position = "none", plot.title = element_text(size=14), axis.title = element_text(size=12), axis.text = element_text(size=12), axis.text.x=element_text(angle=90, vjust=1, hjust=1),
legend.title = element_text(size=12), legend.text = element_text(size=12), strip.text = element_text(size = 12)) +
scale_fill_manual(values = paletteDiscrete(paste0("C", 1:15))) + ggtitle("Number of fragments") + xlab("") + ylab("log10(nFrags)")
tss_samples <- ggplot(lapply(proj_list, function(x){data.frame(AnalysesGroups = x$Clusters, tss = x$TSSEnrichment, Sample = x$Sample)}) %>% do.call(rbind, .), aes(x = AnalysesGroups, y = tss)) + geom_violin(aes(fill = AnalysesGroups), alpha = 0.8) + geom_boxplot(alpha = 0.001) +
theme_minimal() + theme(legend.position = "none", plot.title = element_text(size=14), axis.title = element_text(size=12), axis.text = element_text(size=12), axis.text.x=element_text(angle=90, vjust=1, hjust=1),
legend.title = element_text(size=12), legend.text = element_text(size=12), strip.text = element_text(size = 12)) +
scale_fill_manual(values = paletteDiscrete(paste0("C", 1:15))) + ggtitle("TSS enrichment scores") + xlab("") + ylab("TSS enrichment score") + facet_wrap(~Sample)
options(repr.plot.width=6*length(samples), repr.plot.height=10)
ggarrange(log10frags_samples, tss_samples, nrow = 2)
### Save ArchRProject
for (sample in samples){
saveArchRProject(ArchRProj = proj_list[[sample]], outputDirectory = paste0("ArchRProj_", sample), load = FALSE)
}
lapply(names(proj_list), function(x){all(rownames(proj_list[[x]]@cellColData) == rownames(proj_list[[x]]@embeddings$UMAP$df)) %>% print(.);
write.csv(cbind(proj_list[[x]]@embeddings$UMAP$df, proj_list[[x]]@cellColData), paste0("RawData_", x, ".csv"))})
#
Isabelle Seufert
Division of Chromatin Networks, DKFZ
12.09.2023
sessionInfo()